*****************************************************************************************************************************
* This replication file determines the presence of family ties between notaries and their predecessors based on last names. *
*****************************************************************************************************************************
* 1. Sample and data management
* 2. Create name variables for matching between predecessor notaries
* 3. Results, replicating Table A1, Panel C.


clear
clear matrix
clear mata
set more off
cd $main_directory

clear
import delimited using "Data_Belgium\notaris_be_persons.csv", delimiters(";") case(preserve)
	//Pred_name1, Pred_start1, Pred_end1: name, start date and end date of notary's 1st predecessor, etc...
drop Name Office Legal_form Language  Email Tel Fax Website _url
rename Office_short Office
rename Name_short Name
drop Pred_name5-Pred_end23 


*****************************
* 1 Sample, data management *
*****************************

* 1.1 Define current notary as "predecessor 0"
replace Name = subinstr(Name, "Notaris ","",.)
rename Name Pred_name0
rename Time_since_operation Pred_start0
	
* 1.2 Remove duplicates in associations, and remove missing 1st predecessors
codebook Office VAT_number //one duplicate
drop if Office=="NOTÉRIS, Pierre-Edouard" 
codebook Office VAT_number //no more duplicates
egen NotPerOff=count(VAT_number), by(VAT_number)

drop if Pred_name1=="" //remove observations without name of first predecessor
tab NotPerOff //only 28 observations with info on predecessors in associations with 2 notaries
drop if Pred_name0=="CLERENS Vanessa" //remove duplicates relating to non-immediate predecessors
drop if Pred_name0=="TACK Robbe"
drop if Pred_name0=="SIFFERT Gert"
drop if Pred_name0=="LENAERTS Raf"
drop if Pred_name0=="DENRUYTER Kathleen"
drop if Pred_name0=="VAN BILSEN Géraldine"
drop if Pred_name0=="BRACK Mélanie"
drop if Pred_name0=="CATINUS François"
drop if Pred_name0=="PIRARD Bruno"
drop if Pred_name0=="QUENON Gaëtan"
drop if Pred_name0=="VERMEERSCH Annelien"
drop if Pred_name0=="DE GRAVE Eric"
drop if Pred_name0=="TAILLY Pieterjan"

tab NotPerOff //only 15 observations with info on predecessors in associations with 2 notaries (because no associations before 1999)
drop if NotPerOff>1
drop NotPerOff


*********************************************************************
* 2 Create name variables for matching between predecessor notaries *
*********************************************************************

/*	- names are multiple words, starting with words for last name, then for first name
	- first match on first word of last name
	- for uninformative first words (e.g. van, de, le, ...) also match on second word */

* Break names down to find words of last name and first name (current notary and two predecessors)
foreach x of numlist 0/2 {
	replace Pred_name`x'=lower(Pred_name`x') //make lower cases to simplify the match
	split Pred_name`x',gen(Pred`x'_) // e.g. Pred1_1, Pred1_2 are 1st, 2nd word of Pred_name1
}

* 2.1 Family link if first word of last name is equal: ONE generation ago
	gen link1_0 = (Pred0_1==Pred1_1) & (Pred0_1~="")
	replace link1_0=. if Pred0_1==""|Pred1_1=="" // link one generation ago 		
* 2.1b Family link if also second word of last name is equal - when first word uninformative (van, de, den, der, le, la, du)
	gen link1_0_check = (Pred0_2==Pred1_2) & (Pred0_2~="") if Pred0_1=="van"|Pred0_1=="de"|Pred0_1=="den"|Pred0_1=="der"|Pred0_1=="le"|Pred0_1=="la"|Pred0_1=="du"
	replace link1_0_check=. if Pred0_2==""|Pred1_2==""
	replace link1_0=0 if (link1_0==1) & (link1_0_check==0) // update earlier link1_* variables to match with second word of last name

* 2.2 Family link if first word of last name is equal: TWO generations ago
	gen link2_0 = (Pred0_1==Pred2_1) & (Pred0_1~="")
	replace link2_0=. if Pred0_1==""|Pred2_1=="" // link two generations ago
	gen link12_0=link1_0+(1-link1_0)*link2_0 //link one or two generations ago
* 2.2b Family link if also second word of last name is equal - when first word uninformative
	gen link2_0_check = (Pred0_2==Pred2_2) & (Pred0_2~="") if Pred0_1=="van"|Pred0_1=="de"|Pred0_1=="den"|Pred0_1=="der"|Pred0_1=="le"|Pred0_1=="la"|Pred0_1=="du"
	replace link2_0_check=. if Pred0_2==""|Pred2_2==""
	replace link2_0=0 if (link2_0==1) & (link2_0_check==0) 
	replace link12_0=link1_0+(1-link1_0)*link2_0

sum link1_0 //link one generation ago for current notary
sum link2_0 //links two generations ago for current notary
sum link12_0 // links one or two generations ago for current notary

drop Pred0_1-Pred0_6 Pred1_1-Pred2_6 link1_0_check link2_0_check


*************
* 3 Results *
*************
gen Start0 = substr(Pred_start0,-4,4) 
destring Start0 , replace
sum Start0

*first column Table A1
sum link1_0
sum link1_0 if Start0<1991
sum link1_0 if Start0>1990&Start0<1996
sum link1_0 if Start0>1995&Start0<2001
sum link1_0 if Start0>2000&Start0<2006
sum link1_0 if Start0>2005&Start0<2011
sum link1_0 if Start0>2010&Start0<2016
sum link1_0 if Start0>2015 //links are strongly falling in recent years

*second column Table A1
sum link12_0
sum link12_0 if Start0<1991
sum link12_0 if Start0>1990&Start0<1996
sum link12_0 if Start0>1995&Start0<2001
sum link12_0 if Start0>2000&Start0<2006
sum link12_0 if Start0>2005&Start0<2011
sum link12_0 if Start0>2010&Start0<2016
sum link12_0 if Start0>2015 //links are strongly falling in recent years
